/* 
 * Copyright (C) 2006, Dung-Bang Tsai
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * ( If you wnat to use this library for commercial use, 
 *	  feel free to contact me, just cost some money, I could sell
 *		you the code without GPL license, so you could use this code
 *		for your product without public your source code.  				)
 *
 * Authors:
 *	Tsai, Dung-Bang <dbtsai@gmail.com>
 *			
*			2006/03/05		at NCKU physics
 */

#ifndef _TDBConvertEncode_h
#define _TDBConvertEncode_h
#include "enum_code_page.h"
#include "TDBConvertEngine.h"

class TDBdetect_code_page
{
    /*將來源或目地的raw char的資料用SetSrcChar接上，
   ex若要由char轉ucs4，則將原始char[]array接上，然後測試data屬性
        或已知，然後目的sting定義為TDBstring, 需用SetResultString接上。
    　然後使用ConvertFromSrctoResultString去轉成ucs4, 要注意的是，
    　他的回傳值為讀入幾個raw data，因為有可能被截斷而後面有缺陷。
    
        同理，若有一TDBstring要轉成char[] raw data, 則需先將代轉的sting
        用SetResultString接上，然後ConvertFromResultStringtoSrc即可轉回來。
        
        記得執行完上面在執行下面時，上面的原始資料會被破壞，要小心使用。
    */
	public:
		TDBdetect_code_page()
		{test_n_bytes=0;}
		void SetSrcChar(unsigned char* src_buf)
		{src = src_buf;}
		void SetSrcBytes(size_t src_bytes)
		{src_n_bytes = src_bytes;}
		void SetTestBytes(size_t test_bytes)
		{test_n_bytes= test_bytes;}
		void SetOutputString(TDBstring& mystring){output_string = &mystring;}
 
		// Define in enum_code_page
        size_t ConvertFromSrctoResultString(int code_page); 
		
        bool IsBinaryData();
		bool IsASCII();
// Unicode BOM test function	
		int 	UnicodeBOM_test();			// The return value is defined in enum_code_page.h	
		bool IsUTF8_BOM_test();
		bool IsUTF16LE_BOM_test();
		bool IsUTF16BE_BOM_test();
		bool IsUTF32LE_BOM_test();
		bool IsUTF32BE_BOM_test();

// Code page signature test function
		int Unicode_signature_test();
		bool UTF8_signature_test();
		bool UTF16LE_signature_test();
		bool UTF16BE_signature_test();
		bool UTF32LE_signature_test();
		bool UTF32BE_signature_test();

// Local code page test	
		int Chinese_code_test();
		
    private:	
		unsigned char *src;
        TDBstring *output_string;
// Only input the bytes you want test, 
// generally ony 20k could termin the code page
		size_t src_n_bytes;	
		size_t test_n_bytes;  // if equal to zero, test all bytes.
    
    //######################## Convert charset ####################################

	// Unicode series.
    
    //Return how many bytes outbuf use
	static size_t a_UCS4toUTF32BE(const unsigned int& src, unsigned char *outbuf); 
    static size_t a_UCS4toUTF32LE(const unsigned int& src, unsigned char *outbuf);
    static size_t a_UCS4toUTF16BE(const unsigned int& src, unsigned char *outbuf);
    static size_t a_UCS4toUTF16LE(const unsigned int& src, unsigned char *outbuf);
    static size_t a_UCS4toUTF8(const unsigned int& src, unsigned char *outbuf);
    
    //Return how many bytes input buffer read, and the last argument 
    // is the source string's len, for avoid memory lock.
    // 而若回傳-n, 則是src剩餘空間為len小於該字所需讀入的大小(len < n)。為了避免讀到非法記憶體區域
    // 所以直接跳過終止。若回傳0, 則是有錯誤，跳到下個byte再嘗試。
    static int a_UTF8toUCS4       (const unsigned char* src, unsigned int& outbuf, size_t len);
    static int a_UTF32BEtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
    static int a_UTF32LEtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
    static int a_UTF16BEtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
    static int a_UTF16LEtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
    
// Local charset
    //Big5 series
	static int a_Big5toUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
    static int a_Big5UAOtoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
    static int a_Big52003toUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
    static int a_Big5HKSCStoUCS4(const unsigned char* src, unsigned int& outbuf, size_t len);
};

#endif
